Trying to calculate housing stress using 30/40 as in here: https://www.ahuri.edu.au/policy/ahuri-briefs/2016/3040-indicator
2016 Census - Counting Dwellings, Place of Enumeration (SA2)
Data Source: Census of Population and Housing, 2016, TableBuilder
Counting: Dwellings Location on Census Night
Variables:
library(tidyverse)
library(readxl)
library(sf)
library(tmap)
library(janitor)
library(scales)
##original files - 2016 census data - csv string - zeros suppressed
rentdata <- read_csv("sa2_hied_rntrd_2gmel_ zeros_suppressed.csv", skip = 10) %>%
filter(GCCSA == "Greater Melbourne") %>% # gets rid of the rows at the end
select(-X7) #removes the last column
Missing column names filled in: 'X7' [7]Parsed with column specification:
cols(
Counting = [31mcol_character()[39m,
GCCSA = [31mcol_character()[39m,
SA2 = [31mcol_character()[39m,
`HIED Equivalised Total Household Income (weekly)` = [31mcol_character()[39m,
`RNTRD Rent (weekly) Ranges` = [31mcol_character()[39m,
Count = [32mcol_double()[39m,
X7 = [31mcol_character()[39m
)
49099 parsing failures.
row col expected actual file
1 -- 7 columns 6 columns 'sa2_hied_rntrd_2gmel_ zeros_suppressed.csv'
2 -- 7 columns 6 columns 'sa2_hied_rntrd_2gmel_ zeros_suppressed.csv'
3 -- 7 columns 6 columns 'sa2_hied_rntrd_2gmel_ zeros_suppressed.csv'
4 -- 7 columns 6 columns 'sa2_hied_rntrd_2gmel_ zeros_suppressed.csv'
5 -- 7 columns 6 columns 'sa2_hied_rntrd_2gmel_ zeros_suppressed.csv'
... ... ......... ......... ............................................
See problems(...) for more details.
mortdata <- read_csv("sa2_hied_mrerd_2gmel_ zeros_suppressed.csv", skip = 10) %>%
filter(GCCSA == "Greater Melbourne") %>% # gets rid of the rows at the end
select(-X7) #removes the last column
Missing column names filled in: 'X7' [7]Parsed with column specification:
cols(
Counting = [31mcol_character()[39m,
GCCSA = [31mcol_character()[39m,
SA2 = [31mcol_character()[39m,
`HIED Equivalised Total Household Income (weekly)` = [31mcol_character()[39m,
`MRERD Mortgage Repayments (monthly) Ranges` = [31mcol_character()[39m,
Count = [32mcol_double()[39m,
X7 = [31mcol_character()[39m
)
|===== | 5%
|======== | 8%
|=========== | 11%
|============= | 13%
|=============== | 16% 1 MB
|================== | 19% 1 MB
|==================== | 22% 1 MB
|======================= | 25% 1 MB
|========================== | 27% 1 MB
|============================ | 30% 2 MB
|=============================== | 33% 2 MB
|================================== | 36% 2 MB
|==================================== | 39% 2 MB
|======================================= | 41% 2 MB
|========================================= | 44% 3 MB
|============================================ | 47% 3 MB
|=============================================== | 50% 3 MB
|================================================= | 53% 3 MB
|==================================================== | 55% 3 MB
|======================================================= | 58% 4 MB
|========================================================= | 61% 4 MB
|============================================================ | 64% 4 MB
|=============================================================== | 67% 4 MB
|================================================================= | 70% 4 MB
|==================================================================== | 72% 5 MB
|======================================================================= | 75% 5 MB
|========================================================================= | 78% 5 MB
|============================================================================ | 81% 5 MB
|============================================================================== | 83% 5 MB
|================================================================================= | 86% 6 MB
|==================================================================================== | 89% 6 MB
|====================================================================================== | 92% 6 MB
|========================================================================================= | 95% 6 MB
|=========================================================================================== | 97% 6 MB
|==============================================================================================| 100% 7 MB
59701 parsing failures.
row col expected actual file
1 -- 7 columns 6 columns 'sa2_hied_mrerd_2gmel_ zeros_suppressed.csv'
2 -- 7 columns 6 columns 'sa2_hied_mrerd_2gmel_ zeros_suppressed.csv'
3 -- 7 columns 6 columns 'sa2_hied_mrerd_2gmel_ zeros_suppressed.csv'
4 -- 7 columns 6 columns 'sa2_hied_mrerd_2gmel_ zeros_suppressed.csv'
5 -- 7 columns 6 columns 'sa2_hied_mrerd_2gmel_ zeros_suppressed.csv'
... ... ......... ......... ............................................
See problems(...) for more details.
rent <- rentdata %>%
rename(sa2 = SA2) %>%
rename(hhincwk = `HIED Equivalised Total Household Income (weekly)`) %>%
rename(rentwk = `RNTRD Rent (weekly) Ranges`) %>%
mutate(hhincwk = as_factor(hhincwk), rentwk = as_factor(rentwk)) %>%
rename(hh = Count) %>%
select(-Counting)
rent
rent_dw <- rent %>%
filter(hhincwk != 'Not applicable') %>%
filter(rentwk != 'Not applicable') %>%
group_by(sa2) %>%
summarise(dw = sum(hh))
rent_dw #improve this by just getting the dwelling data from the datapacks and joining them on at the end -G32
rent_hh <- rent %>%
filter(!hhincwk %in% c('Nil income', 'Partial income stated', 'All incomes not stated', 'Not applicable')) %>%
filter(!rentwk %in% c('Not stated', 'Not applicable' ))
rent_hh
mort <- mortdata %>%
rename(sa2 = SA2) %>%
rename(hhincwk = `HIED Equivalised Total Household Income (weekly)`) %>%
rename(mortmth = `MRERD Mortgage Repayments (monthly) Ranges`) %>%
mutate(hhincwk = as_factor(hhincwk), mortmth = as_factor(mortmth)) %>%
rename(hh = Count) %>%
select(-Counting)
mort
mort_dw <- mort %>%
filter(hhincwk != 'Not applicable') %>%
filter(mortmth != 'Not applicable') %>%
group_by(sa2) %>%
summarise(dw = sum(hh))
mort_dw #as noted above
mort_hh <- mort %>%
filter(!hhincwk %in% c('Nil income', 'Partial income stated', 'All incomes not stated', 'Not applicable')) %>%
filter(!mortmth %in% c('Not stated', 'Not applicable'))
mort_hh
hhincwk_mid <- rentdata %>%
rename(sa2 = SA2) %>%
rename(hhincwk = `HIED Equivalised Total Household Income (weekly)`) %>%
mutate(hhincwk = as_factor(hhincwk)) %>%
distinct(hhincwk) %>%
filter(!hhincwk %in% c('Nil income', 'Partial income stated', 'All incomes not stated', 'Not applicable', 'Total')) %>%
mutate(inc_low = c(1, 150, 300, 400, 500, 650, 800, 1000, 1250, 1500, 1750, 2000, 2500, 3000)) %>%
mutate(inc_high = c(149, 299, 399, 499, 649, 799, 999, 1249, 1499, 1749, 1999, 2499, 2999, 3499)) %>%
mutate(inc_mid = (inc_low+inc_high)/2) %>%
select(hhincwk, inc_mid)
hhincwk_mid
Filter to include only bottom 40 per cent of households by income
Equivalised disposable household income - P40 = $714 in 2015-16 for VIC - http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/6523.02015-16?OpenDocument
hhincwk_mid40 <- hhincwk_mid %>%
filter(inc_mid <= 714)
hhincwk_mid40
rentwk_mid <- rentdata %>%
rename(rentwk = `RNTRD Rent (weekly) Ranges`) %>%
distinct(rentwk) %>%
mutate(rentwk = as_factor(rentwk)) %>%
filter(!rentwk %in% c('Not stated', 'Not applicable', 'Total' )) %>%
mutate(rent_low = c(0, 1, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300, 325, 350, 375, 400, 425, 450, 550, 650, 750, 850, 950)) %>%
mutate(rent_high = c(0, 74, 99, 124, 149, 174, 199, 224, 249, 274, 299, 324, 349, 374, 399, 424, 449, 549, 649, 749, 849, 949, 1050)) %>%
mutate(rent_mid = (rent_low + rent_high)/2) %>%
select(rentwk, rent_mid)
rentwk_mid
mortwk_mid <- mortdata %>%
rename(mortmth = `MRERD Mortgage Repayments (monthly) Ranges`) %>%
distinct(mortmth) %>%
mutate(mortmth = as_factor(mortmth)) %>%
filter(!mortmth %in% c('Not stated', 'Not applicable')) %>%
mutate(mort_low = c(0, 1, 150, 300, 450, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 3000, 4000, 5000)) %>%
mutate(mortwk_low = mort_low/4) %>%
mutate(mort_high = c(0, 149, 299, 449, 599, 799, 999, 1199, 1399, 1599, 1799, 1999, 2199, 2399, 2599, 2999, 3999, 4999, 5999)) %>%
mutate(mortwk_high = mort_high/4) %>%
mutate(mort_mid = (mort_low + mort_high)/2) %>%
mutate(mortwk_mid = (mortwk_low + mortwk_high)/2) %>%
select(mortmth, mort_mid, mortwk_mid)
mortwk_mid
#joining the data ####
rent_join <- left_join(hhincwk_mid40, rent_hh)
Joining, by = "hhincwk"
rent_join <- left_join(rent_join, rentwk_mid)
Joining, by = "rentwk"
rent_join
mort_join <- left_join(hhincwk_mid40, mort_hh)
Joining, by = "hhincwk"
mort_join <- left_join(mort_join, mortwk_mid)
Joining, by = "mortmth"
mort_join
rentalstress <- rent_join %>%
mutate(prophousing = rent_mid/inc_mid) %>%
mutate(hstress = ifelse(prophousing <= 0.3, 0, hh)) %>%
group_by(sa2) %>%
summarise(lowinc_hh = sum(hh), hstress = sum(hstress))
rentalstress <- left_join(rentalstress, rent_dw, by = "sa2") %>%
select(sa2, dw, lowinc_hh, hstress) %>%
mutate(propstress = hstress/dw)
rentalstress
mortgagestress <- mort_join %>%
mutate(prophousing = mortwk_mid/inc_mid) %>%
mutate(hstress = ifelse(prophousing <= 0.3, 0, hh)) %>%
group_by(sa2) %>%
summarise(lowinc_hh = sum(hh), hstress = sum(hstress))
mortgagestress <- left_join(mortgagestress, mort_dw, by = "sa2") %>%
select(sa2, dw, lowinc_hh, hstress) %>%
mutate(propstress = hstress/dw)
mortgagestress
housingstress_join <- full_join(rentalstress, mortgagestress)
Joining, by = c("sa2", "dw", "lowinc_hh", "hstress", "propstress")
housingstress_sa2 <- housingstress_join %>%
group_by(sa2) %>%
summarise(dw = sum(dw), lowinc_hh = sum(lowinc_hh), hstress = sum(hstress)) %>%
mutate(prop_stress = hstress/dw) %>%
filter(!is.na(prop_stress))
housingstress_sa2
write_csv(housingstress_sa2, "tables_out/housingstress_sa2.csv")
gm_shp <- st_read("shp/SA2_2016_GMEL.shp") %>%
clean_names() %>%
select(sa2_name16) %>%
left_join(housingstress_sa2, by = c("sa2_name16" = "sa2")) %>%
mutate(prop_stress = round(prop_stress, 2))
Reading layer `SA2_2016_GMEL' from data source `C:\Users\willi\Desktop\R\HousingAffordabilityData_2016Census\shp\SA2_2016_GMEL.shp' using driver `ESRI Shapefile'
Simple feature collection with 309 features and 12 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: 144.3336 ymin: -38.50299 xmax: 145.8784 ymax: -37.1751
epsg (SRID): NA
proj4string: +proj=longlat +ellps=GRS80 +no_defs
Column `sa2_name16`/`sa2` joining factor and character vector, coercing into character vector
tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(gm_shp) +
tm_polygons("prop_stress")